# Project Information -----------------------
# File Name: Replication_Frank_etal_plos.r
# Version: R 4.1.1
# Authors: Lauren Frank, Natasha Gordon, Don Green
# Dependencies: none
# Machine: macOS 14.7

# Setup -----------------------

# load and install required packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, haven, gmodels, sandwich, janitor, stats, gt, webshot2, 
               stargazer, dotwhisker)

rm(list=ls())

#setwd("~/Dropbox/Research/Green Lab/Kenya Edutainment/Kenya_edutainment")

# full endline data set
kenya_data <- readxl::read_excel("Kenya Edutainment 2023 Endline Data.xlsx")

# rename variable(s)
kenya_data <- kenya_data %>%
    rename(expgroup = "exp group")

#Create treatment conditions for full data
kenya_data$treatment <- ""
kenya_data <- kenya_data %>% mutate(treatment = case_when(
    expgroup == "Treatment 1" ~ "N*GEN",
    expgroup == "Treatment 2" ~ "Love&Wlth",
    expgroup == "Control" ~ "Control"
))

# Table 1: Balance check  -----------------------
# labels = Control, N*Gen, Love&Wlth

# binarize treatments
kenya_data <- kenya_data %>% mutate(bin_treat = case_when(
    treatment == "Control" ~ 1,
    treatment == "N*GEN" ~ 2,
    treatment == "Love&Wlth" ~ 3))

# gender
b_gender <- c(summary(kenya_data$gender[kenya_data$bin_treat==1])[4],
              summary(kenya_data$gender[kenya_data$bin_treat==2])[4],
              summary(kenya_data$gender[kenya_data$bin_treat==3])[4],
              summary(aov(gender~bin_treat,kenya_data))[[1]][["Pr(>F)"]][1])

# age
b_age <- c(summary(kenya_data$age[kenya_data$bin_treat==1])[4],
           summary(kenya_data$age[kenya_data$bin_treat==2])[4],
           summary(kenya_data$age[kenya_data$bin_treat==3])[4],
           summary(aov(age~bin_treat,kenya_data))[[1]][["Pr(>F)"]][1])

# education
b_education <- c(summary(kenya_data$education[kenya_data$bin_treat==1])[4],
                 summary(kenya_data$education[kenya_data$bin_treat==2])[4],
                 summary(kenya_data$education[kenya_data$bin_treat==3])[4],
                 summary(aov(education~bin_treat,kenya_data))[[1]][["Pr(>F)"]][1])

# occupation
b_occ <- c(summary(kenya_data$occupation[kenya_data$bin_treat==1])[4],
           summary(kenya_data$occupation[kenya_data$bin_treat==2])[4],
           summary(kenya_data$occupation[kenya_data$bin_treat==3])[4],
           summary(aov(occupation~bin_treat,kenya_data))[[1]][["Pr(>F)"]][1])

# num children
b_children <- c(summary(kenya_data$children[kenya_data$bin_treat==1])[4],
                summary(kenya_data$children[kenya_data$bin_treat==2])[4],
                summary(kenya_data$children[kenya_data$bin_treat==3])[4],
                summary(aov(children~bin_treat,kenya_data))[[1]][["Pr(>F)"]][1])


bal_test <- as.data.frame(rbind(b_gender, b_age, b_education, b_occ, b_children))
names(bal_test) <- c("Control", "N*GEN", "Love & Wealth", "p-value")
bal_test <- bal_test %>%
    round(digits = 2) %>%
    mutate(Covariate = c("Gender", "Age", "Education", "Occupation", "Num. of children"),
           .before = "Control")

# table 1
table1 <- bal_test %>%
    gt() %>%
    opt_table_lines() %>%
    tab_header(title = "Covariate balance across treatment groups") %>%
    tab_spanner(label = "Treatment", columns = c("Control", "N*GEN", "Love & Wealth"))

#table1 %>% gtsave("table1.tex")

# Table 2: Knowledge index -----------------------

# recode answer variables
kenya_data$know_humans <- ifelse(kenya_data$humans == 1, 1, 0)
kenya_data$know_witch <- ifelse(kenya_data$fall_ill == 2, 1, 0)
kenya_data$know_water <- ifelse(kenya_data$clean_water == 1, 1, 0)
kenya_data$know_wash <- ifelse(kenya_data$wash_hands == 1, 1, 0)
kenya_data$know_mosquito <- ifelse(kenya_data$mosquitoes == 1, 1, 0)
kenya_data$know_brain <- ifelse(kenya_data$brain_control == 2, 1, 0)

# create index
kenya_data <- kenya_data %>%
    mutate(know_index = know_humans + know_witch + know_water + know_wash 
           + know_mosquito + know_brain)

know_sum_df <- kenya_data %>%
    dplyr::select(treatment, know_index) %>%
    group_by(treatment) %>%
    tabyl(know_index, treatment) %>%
    adorn_title("top")

# format knowledge table
colnames(know_sum_df) <- know_sum_df[1, ]
know_table <- know_sum_df %>% 
    slice(-1) %>%
    mutate(across(everything(), as.numeric)) %>%
    mutate(know_index = "") %>%
    mutate(across(c("Control", "Love&Wlth", "N*GEN"), ~ paste0("(", ., ")")))

# format percentage table
percents <- know_sum_df %>%
    slice(-1) %>%
    mutate(across(everything(), as.numeric)) %>%
    mutate(across(c("Control", "Love&Wlth", "N*GEN"), ~ (. / sum(.)) * 100)) %>%
    mutate(know_index = c(1, 2, 3, 4, 5, 6)) %>%
    round(digits = 2)

# combine
science_knowledge <- as.data.frame(
    mapply(function(col1, col2) {
        paste(col1, col2, sep = " ")
    }, percents, know_table, SIMPLIFY = FALSE)
)

# add totals
science_knowledge <- science_knowledge %>%
    add_row(know_index = "Total", Control = "315",
            Love.Wlth = "318",
            N.GEN = "310")

# table 2
table2 <- science_knowledge %>%
    rename("Number of Correct Answers" = know_index, "Love & Wealth" = Love.Wlth,
           "N*GEN" = N.GEN) %>%
    gt() %>%
    opt_table_lines() %>%
    tab_header(
        title = "Table 2: Scientific Knowledge Index, by Treatment Assignment"
    ) %>%
    tab_spanner(label = "Treatment", columns = c("Control", "Love & Wealth", "N*GEN")) %>%
    tab_source_note(source_note = "n = 943. \n Table rows show subjects by their number 
                    of correct answers to the six knowledge index questions. No respondent
                    answered zero questions correctly. Table entries are column percentages 
                    (within each experimental group) and the N in parentheses. See Appendix 
                    for detailed question wording.")

#table2 %>% gtsave("table2.tex")

# Table 3: Values index -----------------------

# recode answer variables
kenya_data$ans_global_threat <- ifelse(kenya_data$global_threat == 1, 1, 0)
kenya_data$ans_study_science <- ifelse(kenya_data$study_science  == 1, 1, 0)
kenya_data$ans_scientific_research <- ifelse(kenya_data$scientific_research  == 1, 1, 0)
kenya_data$ans_potential_impact <- ifelse(kenya_data$potential_impact  == 5, 1, 0)
kenya_data$ans_change_scientist <- ifelse(kenya_data$`change-scientist` == 5, 1, 0)
kenya_data$ans_children_science <- ifelse(kenya_data$children_science == 5, 1, 0)

# create index
kenya_data <- kenya_data %>%
    mutate(values_index = ans_global_threat + ans_study_science + ans_scientific_research
           + ans_potential_impact + ans_change_scientist + ans_children_science)

values_sum_df <- kenya_data %>%
    dplyr::select(treatment, values_index) %>%
    group_by(treatment) %>%
    tabyl(values_index, treatment) %>%
    adorn_title("top")

# format values table
colnames(values_sum_df) <- values_sum_df[1, ]
values_table <- values_sum_df %>% 
    slice(-1) %>%
    mutate(across(everything(), as.numeric)) %>%
    mutate(values_index = "") %>%
    mutate(across(c("Control", "Love&Wlth", "N*GEN"), ~ paste0("(", ., ")")))

values_stats_control <- lapply(values_table, function(x) gsub("\\((\\d+)\\)", "\\1", x))
values_stats_control <- values_stats_control %>%
    as.data.frame() %>%
    summarize(mean = mean(Control), sd = sd(Control))

# format percentage table
percents <- values_sum_df %>%
    slice(-1) %>%
    mutate(across(everything(), as.numeric)) %>%
    mutate(across(c("Control", "Love&Wlth", "N*GEN"), ~ (. / sum(.)) * 100)) %>%
    mutate(values_index = c(0, 1, 2, 3, 4, 5, 6)) %>%
    round(digits = 2)

# combine
values_df <- as.data.frame(
    mapply(function(col1, col2) {
        paste(col1, col2, sep = " ")
    }, percents, values_table, SIMPLIFY = FALSE)
)

# add totals
values_df <- values_df %>%
    add_row(values_index = "Total", Control = "100% (315)",
            Love.Wlth = "100% (318)",
            N.GEN = "100% (310)")

# table 3
table3 <- values_df %>%
    rename("Number of Correct Answers" = values_index, "Love & Wealth" = Love.Wlth,
           "N*GEN" = N.GEN) %>%
    gt() %>%
    opt_table_lines() %>%
    tab_header(
        title = "Values & Priorities Index, by Treatment Assignment"
    ) %>%
    tab_spanner(label = "Treatment", columns = c("Control", "Love & Wealth", "N*GEN")) %>%
    tab_source_note(source_note = "Note: Table rows show subjects by their number of answers 
                    in the predicted direction to the seven values and priorities 
                    index questions. The second number in each cell is the column 
                    percentage within each experimental group.")

#table3 %>% gtsave("table3.tex")

# Table 4: Gender-related attitudes -----------------------

## GEMS scale -----------------------
# sex, men_readysex, women_tolerateviolence, couple_childecision, woman_role, man_decisionsmaker
# No problem with missing data

# recode variable
kenya_data$couple_childecision <- case_when(kenya_data$couple_childecision == 5 ~ 1,
                                             kenya_data$couple_childecision == 4 ~ 2,
                                             kenya_data$couple_childecision == 3 ~ 3,
                                             kenya_data$couple_childecision == 2 ~ 4,
                                             kenya_data$couple_childecision == 1 ~ 5,
                                             TRUE ~ NA)

# create index
kenya_data$ans_attitude_mean <- rowMeans(kenya_data[, c("sex", "men_readysex",
                                                        "women_tolerateviolence",
                                                        "couple_childecision",
                                                        "woman_role", "man_decisionsmaker")])

gems <- kenya_data %>% select(sex, men_readysex, women_tolerateviolence, couple_childecision, woman_role, man_decisionsmaker)

gems_table <- kenya_data %>%
    group_by(treatment) %>%
    summarize(n = n(), 
              mean = mean(ans_attitude_mean), 
              sd = sd(ans_attitude_mean),
              se = sd / sqrt(n)) %>%
    mutate(group = "GEMS")

## GNAS Male scale -----------------------
# sons_haveeducation, daughters_sentschool, son_lookafterparents, limited_amount, father_health, 
# daughters_work, daughters_chanceassons, important_reason, daughter_supportherself

gnas <- kenya_data %>% select(sons_haveeducation, daughters_sentschool, son_lookafterparents, limited_amount, father_health, 
               daughters_work, daughters_chanceassons, important_reason, daughter_supportherself)

# recode variables
kenya_data$daughters_work <- case_when(kenya_data$daughters_work == 5 ~ 1,
                                            kenya_data$daughters_work == 4 ~ 2,
                                            kenya_data$daughters_work == 3 ~ 3,
                                            kenya_data$daughters_work == 2 ~ 4,
                                            kenya_data$daughters_work == 1 ~ 5,
                                            TRUE ~ NA)

kenya_data$daughters_chanceassons <- case_when(kenya_data$daughters_chanceassons == 5 ~ 1,
                                       kenya_data$daughters_chanceassons == 4 ~ 2,
                                       kenya_data$daughters_chanceassons == 3 ~ 3,
                                       kenya_data$daughters_chanceassons == 2 ~ 4,
                                       kenya_data$daughters_chanceassons == 1 ~ 5,
                                       TRUE ~ NA)

kenya_data$daughter_supportherself <- case_when(kenya_data$daughter_supportherself == 5 ~ 1,
                                               kenya_data$daughter_supportherself == 4 ~ 2,
                                               kenya_data$daughter_supportherself == 3 ~ 3,
                                               kenya_data$daughter_supportherself == 2 ~ 4,
                                               kenya_data$daughter_supportherself == 1 ~ 5,
                                               TRUE ~ NA)

# create index
kenya_data$ans_gnas_mean <- rowMeans(kenya_data[, c("sons_haveeducation", "daughters_sentschool",
                                                    "son_lookafterparents",
                                                    "limited_amount",
                                                    "father_health", "daughters_work",
                                                    "daughters_chanceassons", "important_reason",
                                                    "daughter_supportherself")])

# gender = 1 if female / 2 if male
gnas_male_table <- kenya_data %>%
    filter(gender == 2) %>%
    group_by(treatment) %>%
    summarize(n = n(), 
              mean = mean(ans_gnas_mean), 
              sd = sd(ans_gnas_mean),
              se = sd / sqrt(n)) %>%
    mutate(group = "GNAS Male") 
## GNAS Female scale -----------------------
gnas_female_table <- kenya_data %>%
    filter(gender == 1) %>%
    group_by(treatment) %>%
    summarize(n = n(), 
              mean = mean(ans_gnas_mean), 
              sd = sd(ans_gnas_mean),
              se = sd / sqrt(n)) %>%
    mutate(group = "GNAS Female") 

# table 4
table <- bind_rows(gems_table, gnas_male_table, gnas_female_table)

table4 <- table %>%
    mutate(across(c("mean", "sd", "se"), round, 2)) %>%
    rename("Treatment" = treatment, "Mean" = mean, "Std. Deviation" = sd, "Std. Error" = se) %>%
    group_by(group) %>%
    gt(groupname_col = "group", row_group_as_column = TRUE) %>%
    opt_table_lines() %>%
    tab_header(title = "Gender-Related Attitudes by Assigned Treatment")

#table4 %>% gtsave("table4.tex")

# Table 5: Sexual health attitudes -----------------------
# test_often
kenya_data$ans_test_often <- ifelse(kenya_data$test_often == 1, 1, 0)
kenya_data$ans_test_mean <- rowMeans(kenya_data[, c("ans_test_often")])

testing_table <- kenya_data %>%
    group_by(treatment) %>%
    summarise(n = n(), 
              mean = mean(ans_test_mean), 
              sd = sd(ans_test_mean),
              se = sd / sqrt(n)) %>%
    mutate(group = "Sexually-active people should be tested for HIV")

# sexually_activemen
kenya_data$ans_sexually_activemen <- ifelse(kenya_data$sexually_activemen == 1, 1, 0)
kenya_data$ans_condom_mean <- rowMeans(kenya_data[, c("ans_sexually_activemen")])

condom_table <- kenya_data %>%
    group_by(treatment) %>%
    summarise(n = n(), 
              mean = mean(ans_condom_mean), 
              sd = sd(ans_condom_mean),
              se = sd / sqrt(n)) %>%
    mutate(group = "Sexually-active men should use condoms")

# choice_women
kenya_data$ans_choice_women <- ifelse(kenya_data$choice_women == 1, 1, 0)
kenya_data$ans_choice_mean <- rowMeans(kenya_data[, c("ans_choice_women")])

choice_table <- kenya_data %>%
    group_by(treatment) %>%
    summarise(n = n(), 
              mean = mean(ans_choice_mean), 
              sd = sd(ans_choice_mean),
              se = sd / sqrt(n)) %>%
    mutate(group = "Women can choose when to start a family")

### table 5
table <- bind_rows(testing_table, condom_table, choice_table)

table5 <- table %>%
    mutate(across(c("mean", "sd", "se"), round, 2)) %>%
    rename("Treatment" = treatment, "Mean" = mean, "Std. Deviation" = sd, "Std. Error" = se) %>%
    group_by(group) %>%
    gt(groupname_col = "group", row_group_as_column = TRUE) %>%
    opt_table_lines() %>%
    tab_header(title = "Sexual Health Attitudes by Assigned Treatment") %>%
    tab_source_note(source_note = "Note: numbers show proportion of group who
                    agree with the statement.")

#table5 %>% gtsave("table5.tex")

# Table 6: Participant enjoyment -----------------------
# show_relevance, relevance_community, show_applicable, enjoy_show, watch_again, more_episodes

# create indices
kenya_data$ans_relevant <- rowMeans(kenya_data[, c("show_relevance", "relevance_community",
                                                   "show_applicable")])
kenya_data$ans_enjoy <- rowMeans(kenya_data[, c("enjoy_show", "watch_again",
                                                "more_episodes")])

# relevance table
relevance_table <- kenya_data %>%
    filter(!is.na(ans_relevant)) %>%
    group_by(treatment) %>%
    summarize(n = n(), 
              mean = mean(ans_relevant), 
              sd = sd(ans_relevant),
              se = sd / sqrt(n)) %>%
    mutate(group = "Relevance of Show")

# enjoyment table
enjoyment_table <- kenya_data %>%
    filter(!is.na(ans_relevant)) %>%
    group_by(treatment) %>%
    summarize(n = n(), 
              mean = mean(ans_enjoy), 
              sd = sd(ans_enjoy),
              se = sd / sqrt(n)) %>%
    mutate(group = "Enjoyment from Show")

# full table 6
table <- bind_rows(relevance_table, enjoyment_table)

table6 <- table %>%
    mutate(across(c("mean", "sd", "se"), round, 2)) %>%
    rename("Treatment" = treatment, "Mean" = mean, "Std. Deviation" = sd, "Std. Error" = se) %>%
    group_by(group) %>%
    gt(groupname_col = "group", row_group_as_column = TRUE) %>%
    opt_table_lines() %>%
    tab_header(title = "Participant Ratings of Program Relevance and Enjoyment") %>%
    tab_source_note(source_note = "Note: table shows data on only those subjects who
                    watched at least one program.")

#table6 %>% gtsave("table6.tex")


# Appendix tables and figures -----------------------

## Table A1: Scientific knowledge by treatment (regression)
a1 <- lm(kenya_data$know_index ~ kenya_data$treatment)
names(a1$coefficients) <- c("Intercept", "Love&Wlth", "N*GEN")

## Table A2: Values index by treatment (regression)
a2 <- lm(kenya_data$values_index ~ kenya_data$treatment)
names(a2$coefficients) <- c("Intercept", "Love&Wlth", "N*GEN")

## Table B1: Gender attitudes (GEMS, regression)
b1 <- lm(kenya_data$ans_attitude_mean ~ kenya_data$treatment)
names(b1$coefficients) <- c("Intercept", "Love&Wlth", "N*GEN")

# Table B2: Gender attitudes (GNAS, male, regression)
male_sample <- kenya_data %>% filter(gender == 2)
b2 <- lm(male_sample$ans_gnas_mean ~ male_sample$treatment)
names(b2$coefficients) <- c("Intercept", "Love&Wlth", "N*GEN")

# Table B3: Gender attitudes (GNAS, female, regression)
female_sample <- kenya_data %>% filter(gender == 1)
b3 <- lm(female_sample$ans_gnas_mean ~ female_sample$treatment)
names(b3$coefficients) <- c("Intercept", "Love&Wlth", "N*GEN")

# Table C1 - Sexual health attitudes (regression #1)
c1 <- lm(kenya_data$ans_test_mean ~ kenya_data$treatment)
names(c1$coefficients) <- c("Intercept", "Love&Wlth", "N*GEN")

# Table C2 - Sexual health attitudes (regression #1)
c2 <- lm(kenya_data$ans_condom_mean ~ kenya_data$treatment)
names(c2$coefficients) <- c("Intercept", "Love&Wlth", "N*GEN")

# Table C3 - Sexual health attitudes (regression #1)
c3 <- lm(kenya_data$ans_choice_mean ~ kenya_data$treatment)
names(c3$coefficients) <- c("Intercept", "Love&Wlth", "N*GEN")

# Table D1 - Relevance (regression)
d1 <- lm(kenya_data$ans_relevant ~ kenya_data$treatment)
names(d1$coefficients) <- c("Intercept", "Love&Wlth", "N*GEN")

# Table D2 - Participant enjoyment (regression)
d2 <- lm(kenya_data$ans_enjoy ~ kenya_data$treatment)
names(d2$coefficients) <- c("Intercept", "Love&Wlth", "N*GEN")

# Tables
table_a1 <- stargazer(a1, a2)
# writeLines(table_a1, "table_a1.tex")
table_a2 <- stargazer(b1, b2, b3, dep.var.labels = c("GEMS", "GNAS-Male", "GNAS-Female"))
table_a3 <- stargazer(c1, c2, c3, dep.var.labels = c("Testing", "Condom use", "Woman's Choice")) 
table_a4 <- stargazer(d1, d2, dep.var.labels = c("Relevance", "Enjoyment"))




